{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "96739b70",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import os\n",
    "import pandas as pd\n",
    "from osgeo import gdal,gdalconst\n",
    "import osgeo.ogr as ogr\n",
    "import osgeo.osr as osr\n",
    "\n",
    "from math import sqrt\n",
    "from sklearn.ensemble import RandomForestRegressor\n",
    "from sklearn.metrics import r2_score\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from sklearn.metrics import mean_absolute_error\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from scipy.stats import pearsonr\n",
    "from scipy.optimize import curve_fit\n",
    "\n",
    "import random\n",
    "import math\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9ff8da6d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# location = 1, name = 'Europe'\n",
    "# location = 2, name = 'Latin America'\n",
    "# location = 3, name = 'Middle east North Africa'\n",
    "# location = 4, name = 'Oceania'\n",
    "# location = 5, name = 'Russia Near abroad'\n",
    "# location = 6, name = 'South East Asia'\n",
    "# location = 7, name = 'Sub_Saharan Africa'\n",
    "# location = 8, name = 'US Canada'\n",
    "\n",
    "location = 3\n",
    "name = 'Middle east North Africa'\n",
    "address = '/Middle east North Africa/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "83417caf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Total nighttime light intensity in each region\n",
    "data = pd.read_csv(address + '/GDP_SUM/2000_2020_' + name + '.csv')\n",
    "x_total = data['GDP_SUM'].to_numpy()\n",
    "y_total = data['NTL_SUM'].to_numpy()\n",
    "\n",
    "def linear(x,a,b):\n",
    "    y = a * x + b\n",
    "    return (y)\n",
    "def __sst(y_no_fitting):\n",
    "    y_mean = sum(y_no_fitting) / len(y_no_fitting)\n",
    "    s_list =[(y - y_mean)**2 for y in y_no_fitting]\n",
    "    sst = sum(s_list)\n",
    "    return sst\n",
    "def __ssr(y_fitting, y_no_fitting):\n",
    "    y_mean = sum(y_no_fitting) / len(y_no_fitting)\n",
    "    s_list =[(y - y_mean)**2 for y in y_fitting]\n",
    "    ssr = sum(s_list)\n",
    "    return ssr\n",
    "def __sse(y_fitting, y_no_fitting):\n",
    "    s_list = [(y_fitting[i] - y_no_fitting[i])**2 for i in range(len(y_fitting))]\n",
    "    sse = sum(s_list)\n",
    "    return sse\n",
    "def goodness_of_fit(y_fitting, y_no_fitting):\n",
    "    SSR = __ssr(y_fitting, y_no_fitting)\n",
    "    SST = __sst(y_no_fitting)\n",
    "    rr = SSR /SST\n",
    "    return rr\n",
    "\n",
    "popt1, pcov1 = curve_fit(linear,x_total,y_total, method='dogbox')\n",
    "y_pre = linear(x_total,popt1[0], popt1[1])\n",
    "r_sq = goodness_of_fit(y_pre,y_total)\n",
    "x_pred_2100 = np.empty(shape=[0,1])\n",
    "df3 = pd.read_csv(address + '/GDP_SUM/2020_2050_SUM.csv')\n",
    "data3 = df3.values[(location-1)*5:location*5,4:] \n",
    "ssps = [1,2,3,4,5]\n",
    "\n",
    "X_test = np.linspace(x_total[0],x_total[-1],100).reshape(-1,1) \n",
    "print(\"R2 = \",r_sq)\n",
    "print(\"y = \",popt1[0],' * x + ',popt1[1])\n",
    "plt.scatter(x_total,y_total,color = 'b')\n",
    "plt.plot(X_test,linear(X_test,popt1[0], popt1[1]),color = 'r')\n",
    "plt.show()\n",
    "y_pred_total = np.empty(shape=[19])\n",
    "for ssp in ssps:\n",
    "    x_pred = data3[ssp-1][:]/1000000000000\n",
    "    y_pred = linear(x_pred.astype(float),popt1[0], popt1[1])\n",
    "    y_pred_total = np.row_stack((y_pred_total,y_pred))\n",
    "    \n",
    "df3 = pd.DataFrame(y_pred_total[1:])\n",
    "df3.index = ['SSP1','SSP2','SSP3','SSP4','SSP5']\n",
    "df3.columns = np.arange(2010,2105,5)\n",
    "df3.to_csv(address + '/GDP_SUM/2020_2050_' + name + '.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "44e39f58",
   "metadata": {},
   "outputs": [],
   "source": [
    "r = \"16\"   ### The INDEX of Selected training result (0-19)\n",
    "\n",
    "X_train_temp = pd.read_csv(address + 'Sampling/X_train'+r+'.csv',index_col = 0)\n",
    "X_test_temp = pd.read_csv(address + 'Sampling/X_test'+r+'.csv',index_col = 0)\n",
    "y_train = pd.read_csv(address + 'Sampling/y_train'+r+'.csv',index_col = 0)\n",
    "y_test = pd.read_csv(address + 'Sampling/y_test'+r+'.csv',index_col = 0)\n",
    "y_train = y_train.values.ravel()\n",
    "y_test = y_test.values.ravel()\n",
    "\n",
    "# Selected features\n",
    "X_train = X_train_temp[['NTL_2015', 'South East Asia_2015_3', 'DEM_South East Asia', 'LU_2020_3', 'DistanceToRails']]\n",
    "X_test = X_test_temp[['NTL_2015', 'South East Asia_2015_3', 'DEM_South East Asia', 'LU_2020_3', 'DistanceToRails']]\n",
    "\n",
    "# Hyperparameters\n",
    "r = \"16\"\n",
    "Hyperparameters = pd.read_csv(address + '/' + name + '_RF.csv')\n",
    "Hyperparameters1 = Hyperparameters.values\n",
    "n_estimators=Hyperparameters1[int(r)][1]\n",
    "max_depth=Hyperparameters1[int(r)][2]\n",
    "min_samples_leaf=Hyperparameters1[int(r)][3]\n",
    "min_samples_split=Hyperparameters1[int(r)][4]\n",
    "max_features=Hyperparameters1[int(r)][5]\n",
    "\n",
    "forest = RandomForestRegressor(n_estimators=n_estimators,max_depth=max_depth,\n",
    "                               min_samples_leaf=min_samples_leaf,min_samples_split=min_samples_split,\n",
    "                               max_features=max_features,oob_score=True,\n",
    "                               criterion='squared_error',random_state=1, n_jobs=-1)\n",
    "forest.fit(X_train, y_train)\n",
    "y_train_pred = forest.predict(X_train)\n",
    "y_test_pred = forest.predict(X_test)\n",
    "\n",
    "MSE_train_temp = mean_squared_error(y_train, y_train_pred)\n",
    "MSE_test_temp = mean_squared_error(y_test, y_test_pred)\n",
    "R2_train_temp = r2_score(y_train, y_train_pred)\n",
    "R2_test_temp = r2_score(y_test, y_test_pred)\n",
    "\n",
    "print('MSE train: %.3f, test: %.3f' % (MSE_train_temp,MSE_test_temp))\n",
    "print('R^2 train: %.3f, test: %.3f' % (R2_train_temp,R2_test_temp))\n",
    "print('oob_score：{}'.format(forest.oob_score_))\n",
    "print(\"MAE:\", mean_absolute_error(y_test, y_test_pred))\n",
    "print(\"MSE:\", mean_squared_error(y_test, y_test_pred))\n",
    "rmse = sqrt(mean_squared_error(y_test, y_test_pred))\n",
    "print(\"RMSE:\", rmse)\n",
    "\n",
    "percentage_rmse = (rmse / np.mean(y_test)) * 100\n",
    "print(\"%RMSE:\", percentage_rmse)\n",
    "# Get numerical feature importances\n",
    "importances = list(forest.feature_importances_)\n",
    "feature_list = X_train\n",
    "\n",
    "feature_importances = [(feature, round(importance, 3)) for feature, importance in zip(feature_list, importances)]\n",
    "feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)\n",
    "\n",
    "print(feature_importances)\n",
    "plt.scatter(y_train, y_train_pred, color='blue')\n",
    "plt.scatter(y_test, y_test_pred, color='red')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6abc637c",
   "metadata": {},
   "outputs": [],
   "source": [
    "ds = gdal.Open('/' + name + '2015.tif')  # NTL in 2015\n",
    "geotransform = ds.GetGeoTransform()\n",
    "_Projection = ds.GetProjection()\n",
    "array = ds.ReadAsArray()    # 7881 13628\n",
    "\n",
    "up_bia = 10\n",
    "left_bia = 10\n",
    "r1 = [[-1]*len(array[0])]*up_bia\n",
    "r2 = [[-1]*len(array[0])]*(20-up_bia)\n",
    "c1 = [[-1]*left_bia]*(len(array)+20)\n",
    "c2 = [[-1]*(20-left_bia)]*(len(array)+20)\n",
    "array = np.row_stack((r1,array,r2))\n",
    "array = np.column_stack((c1,array,c2))\n",
    "\n",
    "input_fold = address + 'RF_input'\n",
    "input_file = os.listdir(input_fold)\n",
    "\n",
    "input_y = '/NTL/' + name + '2020.tif' # NTL in 2020"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7baf8fcb",
   "metadata": {},
   "source": [
    "#### Accuracy evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9e8edcc0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_tif2(file):\n",
    "    print(\"read \", file)\n",
    "    ds1 = gdal.Open(file)\n",
    "    geotransform1 = ds1.GetGeoTransform()\n",
    "    array1 = ds1.ReadAsArray() \n",
    "    aa = (geotransform1[0]-(geotransform[0]-left_bia*geotransform[1]))/geotransform1[1]\n",
    "    bb = ((geotransform[3]-up_bia*geotransform[5])-geotransform1[3])/geotransform1[5]\n",
    "    left_bia1 = round(aa)\n",
    "    up_bia1 = -round(bb)\n",
    "    r11 = [[-1]*len(array1[0])]*up_bia1\n",
    "    r21 = [[-1]*len(array1[0])]*(len(array)+20-len(array1)-up_bia1)\n",
    "    c11 = [[-1]*left_bia1]*(len(array)+20)\n",
    "    c21 = [[-1]*((len(array[0])+20)-len(array1[0])-left_bia1)]*(len(array)+20)\n",
    "    if r11 == [] and r21 != []:\n",
    "        array1 = np.row_stack((array1,r21))\n",
    "    elif r11 != [] and r21 == []:\n",
    "        array1 = np.row_stack((r11,array1))\n",
    "    elif r11 != [] and r21 != []: \n",
    "        array1 = np.row_stack((r11,array1,r21))\n",
    "    if c11 == [] and c21 != []:\n",
    "        array1 = np.column_stack((array1,c21))\n",
    "    elif c11 != [] and c21 == []:\n",
    "        array1 = np.column_stack((c11,array1))\n",
    "    elif c11 != [] and c21 != []: \n",
    "        array1 = np.column_stack((c11,array1,c21)) \n",
    "    return array1\n",
    "\n",
    "def read_tif3(file):\n",
    "    print(\"read \", file)\n",
    "    ds1 = gdal.Open(file)\n",
    "    geotransform1 = ds1.GetGeoTransform()\n",
    "    array1 = ds1.ReadAsArray() \n",
    "    return array1\n",
    "\n",
    "def get_factors2(cor,js):\n",
    "    global x,y\n",
    "    x = cor[:,0]\n",
    "    y = cor[:,1]\n",
    "    factors2 = np.empty(shape=[js,5]) \n",
    "\n",
    "#     Choose the required features based on their importance ranking\n",
    "    for i in range(len(x)):\n",
    "        factors2[i][2] = array_dem[int(x[i])][int(y[i])]\n",
    "        factors2[i][5] = array_highway[int(x[i])][int(y[i])]\n",
    "        factors2[i][4] = array_rails[int(x[i])][int(y[i])]\n",
    "#         factors2[i][6] = array_water[int(x[i])][int(y[i])]  \n",
    "        factors2[i][0] = array_ntl[int(x[i])][int(y[i])]\n",
    "#         factors2[i][3] = array_lu[int(x[i])][int(y[i])]\n",
    "        factors2[i][3] = array_lu3[int(x[i])][int(y[i])]\n",
    "#         factors2[i][1] = array_lu5[int(x[i])][int(y[i])]\n",
    "#         factors2[i][0] = array_ntllog[int(x[i])][int(y[i])]\n",
    "        factors2[i][1] = array_ntl3[int(x[i])][int(y[i])]\n",
    "#         factors2[i][3] = array_ntl5[int(x[i])][int(y[i])]\n",
    "\n",
    "        for j in [4,5]:   #  The index of Distance Data\n",
    "            if factors2[i][j]<0:\n",
    "                factors2[i][j] = 0\n",
    "\n",
    "    factors3 = pd.DataFrame(factors2)\n",
    "    factors3.columns = ['NTL_2015', 'South East Asia_2015_3', 'DEM_South East Asia', 'LU_2020_3', 'DistanceToRails'] \n",
    "\n",
    "    return factors3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "73dcb012",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Select and read only the required features\n",
    "\n",
    "array_dem = read_tif2(input_fold + \"/DEM_\" + name + \".tif\")\n",
    "# array_highway = read_tif2(input_fold + \"/Europe_highways.tif\")\n",
    "array_rails = read_tif2(input_fold + \"/DistanceToRails.tif\")\n",
    "# array_water = read_tif2(input_fold + \"/Europe_water.tif\")\n",
    "# array_lu = read_tif2(input_fold + \"/LU_2020_c.tif\")\n",
    "array_lu3 = read_tif2(input_fold + \"/LU_2020_3.tif\")\n",
    "# array_lu5 = read_tif2(input_fold + \"/LU2015_5.tif\")\n",
    "array_ntl = read_tif2(input_fold + \"/NTL_2015.tif\")\n",
    "# array_ntllog = read_tif2(input_fold + \"/NTL2015log.tif\")\n",
    "array_ntl3 = read_tif2(input_fold + \"/\" + name + \"_2015_3.tif\")\n",
    "# array_ntl5 = read_tif2(input_fold + \"/NTL2015_5.tif\")\n",
    "\n",
    "for i in range(len(array_dem)):\n",
    "    for j in range(len(array_dem[0])):\n",
    "        if array_dem[i][j] == -32768:\n",
    "            array_dem[i][j] = 0\n",
    "            \n",
    "print(\"FINISH.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a9e37c4d",
   "metadata": {},
   "outputs": [],
   "source": [
    "fs = len(array[0])//10 + 1\n",
    "final_pred = np.zeros(shape=[len(array),fs*5])\n",
    "\n",
    "for k in range(10):\n",
    "    array_local = np.empty(shape=[len(array)*fs,2])\n",
    "    js = 0\n",
    "    for i in range(len(array)):\n",
    "        for j in range(fs):\n",
    "            jj = j + k * fs\n",
    "            if jj <= len(array[0])-1:\n",
    "                if array[i][jj] < 0:\n",
    "                    continue\n",
    "                array_local[js][0] = i \n",
    "                array_local[js][1] = jj   \n",
    "                js += 1\n",
    "    if js != 0:\n",
    "        X_all = get_factors2(array_local[:js],js)\n",
    "        X_all = X_all.astype('float32')\n",
    "        y_all = forest.predict(X_all)\n",
    "\n",
    "        for i in range(js):\n",
    "            array_local_x = array_local[i][0]\n",
    "            array_local_y = array_local[i][1]\n",
    "            final_pred[int(array_local_x)][int(array_local_y) % (fs*5)] = y_all[i]\n",
    "\n",
    "    if (k+1) % 5 == 0:\n",
    "        print(k)\n",
    "        pf = pd.DataFrame(final_pred)\n",
    "        pf.to_csv(address + '/Sampling\\k'+str(k)+\".csv\",header = None,index = None)\n",
    "        final_pred = np.zeros(shape=[len(array),fs*5])\n",
    "\n",
    "print(\"read csv\")       \n",
    "df_sum = pd.DataFrame()\n",
    "k = [4,9]\n",
    "for i in k:\n",
    "    df = pd.read_csv(address + '/Sampling\\k'+str(i)+\".csv\",header = None)\n",
    "    df_sum = pd.concat([df_sum,df],axis=1)\n",
    "    print(i)\n",
    "ss = df_sum.to_numpy()\n",
    "tt = ss[:,:len(array[0])]\n",
    "\n",
    "# Actual values\n",
    "infile = '/NTL/' + name + '2020.tif' # NTL in 2020\n",
    "ds1 = gdal.Open(infile)\n",
    "geotransform1 = ds1.GetGeoTransform()\n",
    "array1 = ds1.ReadAsArray() \n",
    "aa = (geotransform1[0]-(geotransform[0]-left_bia*geotransform[1]))/geotransform1[1]\n",
    "bb = ((geotransform[3]-up_bia*geotransform[5])-geotransform1[3])/geotransform1[5]\n",
    "left_bia1 = round(aa)\n",
    "up_bia1 = -round(bb)\n",
    "r11 = [[0]*len(array1[0])]*up_bia1\n",
    "r21 = [[0]*len(array1[0])]*(len(array)-len(array1)-up_bia1)\n",
    "c11 = [[0]*left_bia1]*len(array)\n",
    "c21 = [[0]*(len(array[0])-len(array1[0])-left_bia1)]*len(array)\n",
    "if r11 == [] and r21 != []:\n",
    "    array1 = np.row_stack((array1,r21))\n",
    "elif r11 != [] and r21 == []:\n",
    "    array1 = np.row_stack((r11,array1))\n",
    "elif r11 != [] and r21 != []: \n",
    "    array1 = np.row_stack((r11,array1,r21))\n",
    "if c11 == [] and c21 != []:\n",
    "    array1 = np.column_stack((array1,c21))\n",
    "elif c11 != [] and c21 == []:\n",
    "    array1 = np.column_stack((c11,array1))\n",
    "elif c11 != [] and c21 != []: \n",
    "    array1 = np.column_stack((c11,array1,c21)) \n",
    "    \n",
    "x_temp_1 = np.zeros(shape = [math.ceil(len(array)*len(array[0])*0.7)])\n",
    "y_temp_1 = np.zeros(shape = [math.ceil(len(array)*len(array[0])*0.7)])\n",
    "\n",
    "js = 0\n",
    "# tt: Predicted values；array1：Actual values\n",
    "for i in range(len(array)):\n",
    "    for j in range(len(array[0])):\n",
    "        if array[i][j] >= 0:\n",
    "            x_temp_1[js] = tt[i][j]\n",
    "            y_temp_1[js] = array1[i][j]\n",
    "            js += 1\n",
    "            \n",
    "x_2020 = x_temp_1[:js]\n",
    "y_2020 = y_temp_1[:js]\n",
    "y_sum_temp = y_2020.sum() \n",
    "\n",
    "# Scale proportionally\n",
    "x_temp = tt.flatten()\n",
    "x_sum_temp = x_temp.sum() \n",
    "rate_ntl = y_sum_temp / x_sum_temp\n",
    "tt_new = tt * rate_ntl\n",
    "\n",
    "for i in range(len(tt_new)):\n",
    "    for j in range(len(tt_new[0])):\n",
    "        if tt_new[i][j] <= intercept:\n",
    "            tt_new[i][j] = 0\n",
    "\n",
    "_GeoTransform = (geotransform[0]-left_bia*geotransform[1], geotransform[1], geotransform[2],\\\n",
    "                 geotransform[3]-up_bia*geotransform[5], geotransform[4], geotransform[5])\n",
    "driver = gdal.GetDriverByName(\"GTiff\")\n",
    "outdata = driver.Create(address + '/RF_output/' + name + '2020_pre.tif',\n",
    "                        len(array[0]),len(array),\n",
    "                        1,gdalconst.GDT_Float32)\n",
    "outdata.SetGeoTransform(_GeoTransform)\n",
    "outdata.SetProjection(_Projection)\n",
    "outband=outdata.GetRasterBand(1)   \n",
    "outband.WriteArray(tt_new)\n",
    "outdata.FlushCache()\n",
    "outdata = None\n",
    "print(\"finish.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8d4f5209",
   "metadata": {},
   "source": [
    "# Prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fd564d86",
   "metadata": {},
   "outputs": [],
   "source": [
    "def NTL_3(NTLinput,NTLoutput): \n",
    "    array_lu = read_tif3(NTLinput)   \n",
    "    len_row = len(array_lu)\n",
    "    len_col = len(array_lu[0])\n",
    "    row_times = (len_row // 5000 + 1)\n",
    "    col_times = (len_col // 5000 + 1)\n",
    "    print(\"len_row = \",len_row,\", len_col = \",len_col)\n",
    "    print(\"row_times = \",row_times,\", col_times = \",col_times) \n",
    "    \n",
    "    # Calculate the surrounding values\n",
    "    array_pre = np.empty(shape = [5000,5000])\n",
    "    for row_time in range(row_times):\n",
    "        for col_time in range(col_times):\n",
    "            for ii in range(5000):\n",
    "                i = ii + 5000 * row_time\n",
    "                if i > (len_row-1):\n",
    "                    break\n",
    "                for jj in range(5000):\n",
    "                    j = jj + 5000 * col_time\n",
    "                    if j > (len_col-1):\n",
    "                        break\n",
    "                    if array[i][j] < 0:\n",
    "                        array_pre[ii][jj] = -1\n",
    "                    else: \n",
    "                        count = 0\n",
    "                        array_temp = 0\n",
    "                        if array[i][j] >= 0:\n",
    "                            array_temp = array_temp + array[i][j]\n",
    "                            count += 1\n",
    "                        if j < (len_col-1) and array[i][j+1] >= 0:\n",
    "                            count += 1\n",
    "                            array_temp = array_temp + array[i][j+1]\n",
    "                        if j > 0 and array[i][j-1] >= 0:\n",
    "                            count += 1\n",
    "                            array_temp = array_temp + array[i][j-1]    \n",
    "                        if i < (len_row-1) and array[i+1][j] >= 0:\n",
    "                            count += 1\n",
    "                            array_temp = array_temp + array[i+1][j]\n",
    "                        if i < (len_row-1) and j < (len_col-1) and array[i+1][j+1] >= 0:\n",
    "                            count += 1\n",
    "                            array_temp = array_temp + array[i+1][j+1]\n",
    "                        if i < (len_row-1) and j > 0 and array[i+1][j-1] >= 0:\n",
    "                            count += 1\n",
    "                            array_temp = array_temp + array[i+1][j-1]\n",
    "                        if i > 0 and array[i-1][j] >= 0:\n",
    "                            count += 1\n",
    "                            array_temp = array_temp + array[i-1][j]\n",
    "                        if i > 0 and j < (len_col-1) and array[i-1][j+1] >= 0:\n",
    "                            count += 1\n",
    "                            array_temp = array_temp + array[i-1][j+1]\n",
    "                        if i > 0 and j > 0 and array[i-1][j-1] >= 0:\n",
    "                            count += 1\n",
    "                            array_temp = array_temp + array[i-1][j-1]\n",
    "                        if count != 0:\n",
    "                            array_pre[ii][jj] = array_temp/count\n",
    "                        elif count == 0:\n",
    "                            array_pre[ii][jj] = -1\n",
    "                if ii %1000 == 0:\n",
    "                    print(ii)\n",
    "            pf = pd.DataFrame(array_pre)\n",
    "            pf.to_csv( address1 + '/Sampling/NTLsum_'+str(row_time)+ '_' +str(col_time)+ \".csv\",header = None,index = None)\n",
    "            array_pre = np.empty(shape = [5000,5000])\n",
    "\n",
    "    df_col = pd.DataFrame()\n",
    "    df_sum = pd.DataFrame()\n",
    "    for row in range(row_times):\n",
    "        for col in range(col_times):\n",
    "            df = pd.read_csv(address1 + '/Sampling/NTLsum_'+str(row)+ '_' +str(col)+ \".csv\",header = None)\n",
    "            df_col = pd.concat([df_col,df],axis=1)\n",
    "        df_sum = pd.concat([df_sum,df_col])\n",
    "        df_col = pd.DataFrame()\n",
    "    array_sum1 = df_sum.to_numpy()\n",
    "    array_sum = array_sum1[:len_row,:len_col]\n",
    "\n",
    "    _GeoTransform = (geotransform[0]-left_bia*geotransform[1], geotransform[1], geotransform[2],\\\n",
    "                 geotransform[3]-up_bia*geotransform[5], geotransform[4], geotransform[5])\n",
    "    driver = gdal.GetDriverByName(\"GTiff\")\n",
    "    outdata = driver.Create(NTLoutput,len_col,len_row,\n",
    "                            1,gdalconst.GDT_Float32)\n",
    "    outdata.SetGeoTransform(_GeoTransform)\n",
    "    outdata.SetProjection(_Projection)\n",
    "    outband=outdata.GetRasterBand(1)  \n",
    "    outband.WriteArray(array_sum)\n",
    "    outdata.FlushCache()\n",
    "    outdata = None       \n",
    "    print(NTLoutput)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "60602810",
   "metadata": {},
   "outputs": [],
   "source": [
    "def pred2025_2050(ssp_,year_):    \n",
    "    if year_ == 2020:\n",
    "        ntl_file = '/NTL/' + name + '2020.tif'   # Here use actual 2020 nighttime light data\n",
    "        array_ntl = read_tif2(ntl_file)\n",
    "        ntl_3_output = address1 + '/NTL_3/NTL_SSP'+ str(ssp_) + '_' + str(year_)+ '_3.tif'\n",
    "        NTL_3(ntl_file,ntl_3_output)\n",
    "        array_ntl3 = read_tif2(ntl_3_output)\n",
    "        \n",
    "        lu_file =  address + '/LU_3/' + name + '_SSP' + str(ssp_) + '_2020_c.tif'\n",
    "        lu_file3 =  address + '/LU_3/' + name + '_SSP' + str(ssp_) + '_2020_3.tif'\n",
    "        array_lu = read_tif2(lu_file)\n",
    "        array_lu3 = read_tif2(lu_file3) \n",
    "    else:\n",
    "        ntl_file = address1 + '/RF_output/NTL_SSP'+ str(ssp_) + '_' + str(year_)+ '.tif'\n",
    "        array_ntl = read_tif2(ntl_file)\n",
    "        ntl_3_output = address1 + '/NTL_3/NTL_SSP'+ str(ssp_) + '_' + str(year_)+ '_3.tif'\n",
    "        NTL_3(ntl_file,ntl_3_output)\n",
    "        array_ntl3 = read_tif2(ntl_3_output)\n",
    "        \n",
    "        if str(year_)[-1] == '5':\n",
    "            lu_file = address + '/LU_3/' + name + '_SSP'+ str(ssp_) + '_' + str(year_+5) + '_c.tif'  \n",
    "            lu_file3 = address + '/LU_3/' + name + '_SSP'+ str(ssp_) + '_' + str(year_+5) + '_3.tif'\n",
    "        elif str(year_)[-1] == '0':    \n",
    "            lu_file = address + '/LU_3/' + name + '_SSP'+ str(ssp_) + '_' + str(year_) + '_c.tif'  \n",
    "            lu_file3 = address + '/LU_3/' + name + '_SSP'+ str(ssp_) + '_' + str(year_) + '_3.tif'  \n",
    "        array_lu = read_tif2(lu_file)\n",
    "        array_lu3 = read_tif2(lu_file3)\n",
    "    print(\"Finish Array.\")   \n",
    "    \n",
    "    \n",
    "    fs = len(array[0])//10 + 1\n",
    "    final_pred = np.zeros(shape=[len(array),fs*5])\n",
    "\n",
    "    for k in range(10):\n",
    "        array_local = np.empty(shape=[len(array)*fs,2])\n",
    "        js = 0\n",
    "        for i in range(len(array)):\n",
    "            for j in range(fs):\n",
    "                jj = j + k * fs\n",
    "                if jj <= len(array[0])-1:\n",
    "                    if array[i][jj] < 0:\n",
    "                        continue\n",
    "                    array_local[js][0] = i \n",
    "                    array_local[js][1] = jj   \n",
    "                    js += 1\n",
    "        if js != 0:\n",
    "            cor = array_local[:js]\n",
    "            x = cor[:,0]\n",
    "            y = cor[:,1]\n",
    "            factors2 = np.empty(shape=[js,5]) \n",
    "            \n",
    "            #     Choose the required features based on their importance ranking\n",
    "            for i in range(len(x)):\n",
    "                factors2[i][2] = array_dem[int(x[i])][int(y[i])]\n",
    "                factors2[i][5] = array_highway[int(x[i])][int(y[i])]\n",
    "                factors2[i][4] = array_rails[int(x[i])][int(y[i])]\n",
    "        #         factors2[i][6] = array_water[int(x[i])][int(y[i])]  \n",
    "                factors2[i][0] = array_ntl[int(x[i])][int(y[i])]\n",
    "        #         factors2[i][3] = array_lu[int(x[i])][int(y[i])]\n",
    "                factors2[i][3] = array_lu3[int(x[i])][int(y[i])]\n",
    "        #         factors2[i][1] = array_lu5[int(x[i])][int(y[i])]\n",
    "        #         factors2[i][0] = array_ntllog[int(x[i])][int(y[i])]\n",
    "                factors2[i][1] = array_ntl3[int(x[i])][int(y[i])]\n",
    "        #         factors2[i][3] = array_ntl5[int(x[i])][int(y[i])]\n",
    "\n",
    "                for j in [4,5]:   #  The index of Distance Data\n",
    "                    if factors2[i][j]<0:\n",
    "                        factors2[i][j] = 0\n",
    "\n",
    "            factors3 = pd.DataFrame(factors2)\n",
    "            factors3.columns = ['NTL_2015', 'South East Asia_2015_3', 'DEM_South East Asia', 'LU_2020_3', 'DistanceToRails'] \n",
    "            X_all = factors3   \n",
    "            X_all = X_all.astype('float32')              \n",
    "            y_all = forest.predict(X_all)\n",
    "\n",
    "            for i in range(js):\n",
    "                array_local_x = array_local[i][0]\n",
    "                array_local_y = array_local[i][1]\n",
    "                final_pred[int(array_local_x)][int(array_local_y)% (fs*5)] = y_all[i]\n",
    "        if (k+1) % 5 == 0:\n",
    "            print(k)\n",
    "            pf = pd.DataFrame(final_pred)\n",
    "            pf.to_csv(address + '/Sampling\\k'+str(k)+\".csv\",header = None,index = None)\n",
    "            final_pred = np.zeros(shape=[len(array),fs*5])\n",
    "    print(\"k finished.\")\n",
    "\n",
    "    df_sum = pd.DataFrame()\n",
    "    k = [4,9]\n",
    "    for i in k:\n",
    "        df = pd.read_csv(address + '/Sampling\\k'+str(i)+\".csv\",header = None)\n",
    "        df_sum = pd.concat([df_sum,df],axis=1)\n",
    "        print(i)\n",
    "    ss = df_sum.to_numpy()\n",
    "    tt = ss[:,:len(array[0])]\n",
    "    \n",
    "    # Scale proportionally\n",
    "    y_temp = pd.read_csv(address + '/GDP_SUM/2020_2050_' + name + '.csv',index_col = 0)\n",
    "    y_sum_temp = y_temp[str(year_+5)].loc[\"SSP\"+str(ssp_)] \n",
    "    x_temp = tt.flatten()\n",
    "    x_sum_temp = x_temp.sum() \n",
    "    rate_ntl = y_sum_temp / x_sum_temp\n",
    "    tt_new = tt * rate_ntl\n",
    "    \n",
    "    # Remove values between 0 and 1\n",
    "    tt_new = np.where(tt_new<1,0,tt_new)\n",
    "    \n",
    "    _GeoTransform = (geotransform[0]-left_bia*geotransform[1], geotransform[1], geotransform[2],\\\n",
    "                     geotransform[3]-up_bia*geotransform[5], geotransform[4], geotransform[5]) ###   \n",
    "    driver = gdal.GetDriverByName(\"GTiff\")\n",
    "    tif_name = address + '/RF_output/NTL_SSP'+ str(ssp_) + '_' + str(year_+5)+ '.tif'\n",
    "    outdata = driver.Create(tif_name,\n",
    "                            len(array[0]),len(array),\n",
    "                            1,gdalconst.GDT_Float32)\n",
    "    outdata.SetGeoTransform(_GeoTransform)\n",
    "    outdata.SetProjection(_Projection)\n",
    "    outband=outdata.GetRasterBand(1)  \n",
    "    outband.WriteArray(tt_new) \n",
    "    outdata.FlushCache()\n",
    "    outdata = None\n",
    "    print(\"finish.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "af832a19",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# main\n",
    "ssps_ = [1,2,3,4,5]\n",
    "years_ = [2020,2025,2030,2035,2040,2045]\n",
    "\n",
    "for ssp_ in ssps_:\n",
    "    for year_ in years_:\n",
    "        print('SSP',ssp_,'_',year_,'Start.')\n",
    "        pred2025_2050(ssp_,year_)\n",
    "        print('SSP',ssp_,'_',year_,'Finish.')\n",
    "print(\"Finish All.\")           "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39caa8c0",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
